library(ggplot2)
require(data.table)
library("dplyr")
library("ggpubr") 
require(tidyverse) #for easy data manipulation and visualization
require(caret) #for easy machine learning workflow
library(ncdf4)



##=========directory to replication data and scripts=============
wd1 <- "C:\\Users\\yfan\\...\\NFood_replication\\"
crops = c("Corn", "Wheat", "Soybean", "Cotton", "Rice", "Sugarcane", "Tropical Corn", "Tropical Soybean")
cropname = c("Maize", "Sugarcane", "Wheat", "Rice", "Soy", "Cotton")
pft = c(17,18,  19,20,   23,24,    41,42,    61,62,   67,68,   75,76, 77,78) #rainfed vs. irrigated
case = c("RCP45","RCP45_85LU","RCP85","RCP85_45CO2","RCP85_SAI","RCP85_MSB","RCP85_CCT")
casename = c("RCP45",expression('RCP45'[(SSP5LU)]),"RCP85",expression('RCP85'[(RCP45CO2)]),"RCP85+SAI","RCP85+MSB","RCP85+CCT")



#******************************************************************************
#===================Preparing intermediate data for Fig.1======================
#******************************************************************************

#===========show growing season climate intead of annual climate===============
#load data prepared by Data1.GrowingSeasonClimate-Yield.R
load(paste0(wd1,"/data/RData/GridSample-GrSnClimate-Data-final.Rdata"))
df.all.G1 <- rbind(df.cntl.all,df.scen.all, fill=TRUE)
#merge tropical and temperate corn/soybean
df.all.G1[Crop=="Tropical Corn", Crop:="Corn"]
df.all.G1[Crop=="Tropical Soybean", Crop:="Soybean"]
#make sure all scenarios have the same sample years for consistent bootstrapping
length(unique(df.all.G1[Scenario=="85"]$Sample));length(unique(df.all.G1[Scenario=="45"]$Sample));length(unique(df.all.G1[Scenario=="sai"]$Sample))
length(unique(df.all.G1[Scenario=="85"]$Year));length(unique(df.all.G1[Scenario=="45"]$Year));length(unique(df.all.G1[Scenario=="sai"]$Year))
length(unique(df.all.G1$Scenario));length(unique(df.all.G1$Crop))

df.6crop.wm <- df.all.G1[,.(tsa.gl=weighted.mean(tsa.m, area), radd.gl=weighted.mean(radd.m, area), radi.gl=weighted.mean(radi.m, area), 
                            rh.gl=weighted.mean(rh.m, area), ppt.gl=weighted.mean(ppt.m, area), 
                            yield0.gl=weighted.mean(yield0, area), yield.gl=weighted.mean(yield, area),
                            area.gl=sum(area)), by=.(Scenario, Year, Crop, Irrig)] 

df.5case.wm <- df.all.G1[,.(tsa.gl=weighted.mean(tsa.m, area), radd.gl=weighted.mean(radd.m, area), radi.gl=weighted.mean(radi.m, area), 
                            rh.gl=weighted.mean(rh.m, area), ppt.gl=weighted.mean(ppt.m, area), 
                            yield0.gl=weighted.mean(yield0, area), yield.gl=weighted.mean(yield, area), 
                            area.gl=sum(area)), by=.(Scenario, Year)] 

unique(df.5case.wm$Scenario)
df.5case.wm[Scenario=="45",Case:="RCP45"]
df.5case.wm[Scenario=="85",Case:="RCP85"]
df.5case.wm[Scenario=="sai",Case:="SAI"]
df.5case.wm[Scenario=="msb",Case:="MSB"]
df.5case.wm[Scenario=="cct",Case:="CCT"]
df.5case.wm[,Scenario:=NULL]

rm(df.all.G1, df.cntl.all, df.scen.all)


library(forecast)
#df.5case.ma <- df.5case.wm[, lapply(.SD, ma, order=10), by =.(Case)] #10-year running mean
df.5case.ma <- df.5case.wm[, lapply(.SD, ma, order=5), by =.(Case)] #5-year running mean
df.5case.ma <- df.5case.ma[, lapply(.SD, as.numeric), by =.(Case)]
df.5case.ma$Year <- df.5case.wm$Year  



#==data for preindustrial growing season control climate (1850 crop area not known, so only do global average)
#==for consistancy use NorESM1-ME historical data 1850-1860 mean climatology for piControl
#regrid the 2-degree hist data to 1-degree with nearest neighbor or billinear (CLM5 used mapalgo = "bilinear" to interpolate 2-degree forcing to 1-degree land)
#cdo remapnn,r288x192 climate_data/HIST.clm2.h0.1850-2005.nc climate_data/HIST.clm2.h0.1850-2005-nn-1deg.nc
#cdo remapbil,r288x192 climate_data/HIST.clm2.h0.1850-2005.nc climate_data/HIST.clm2.h0.1850-2005-bil-1deg.nc

##NOTE: unzip the large file under folder /climate_data/HIST.clm2.h0.1850-2005-bil-1deg.zip to /climate_data/*
hist <- nc_open(paste0(wd1, "/data/climate_data/HIST.clm2.h0.1850-2005-bil-1deg.nc")) #regrided 1-degree data
lon <- ncvar_get(hist,"lon")
lat <- ncvar_get(hist,"lat")
nyear <- 30 #calc monthly average climate of 30 years as preindustrial baseline
tsa.noresm <- ncvar_get(hist, "TSA", start=c(1,1,1), count=c(-1,-1, 12*nyear))-273.15 #to celsius


dimnames(tsa.noresm) <- list(lon=lon, lat=lat, mon=rep(1:12, nyear))
tsa.noresm.18501879 <- as.data.table(tsa.noresm, sorted=FALSE, na.rm=FALSE) #keep all original values and sequence (no sort) from array
is.na(tsa.noresm.18501879) <- sapply(tsa.noresm.18501879, is.nan)  #convert NaN (missing) to NA
tsa.noresm.m <-  tsa.noresm.18501879[,.(tsa.m=mean(value, na.rm=TRUE)), by=.(lon,lat,mon)] #monthly average climate of 30 years
tsa.noresm.pi <- array(tsa.noresm.m$tsa.m, dim=c(288,192,12) ) #back to array
nc_close(hist);rm(hist)



#=====estimate piControl growing season climate using same growing periods GrowNH and GrowSH
y.85 <- nc_open(paste0(wd1, "/data/yield_data/RCP85_crop_data.2006-2100.nc"))
lon <- ncvar_get(y.85,"lon")
lat <- ncvar_get(y.85,"lat")
cft <- ncvar_get(y.85,"cft")
area <- ncvar_get(y.85, "CROP_AREA", start=c(1,1,1,1), count=c(length(lon),length(lat),length(cft),1)) #read 1 crop, 1 year
area[area>=9.9692e+36] <- NA #set NA
area.all <- apply(area, c(1, 2), sum, na.rm=TRUE) #all crop area
latmx <- matrix(rep(lat,each=length(lon)),nrow=length(lon),ncol=length(lat)) #matrix of latitude
gridmx <- array(1:(288*192), dim=c(288, 192)) #matrix of grid index

GrowNH=c(0,0,0,0,1,1,1,1,1,1,0,0) #most crops grow 150 days from April/June to Aug/Oct
GrowSH=c(1,1,1,1,0,0,0,0,0,0,1,1) #use average May-Oct for NH, Nov-Apr for SH, as in Levis et al. 2016

clim.1850 <- data.table(area=rep(area.all, 12), #annual to monthly for consistent mask with growing season filter
                        tsa=c(tsa.noresm.pi), #ppt=c(ppt.1850), rh=c(rh.1850),
                        #radd=c(radd.1850), radi=c(radi.1850), 
                        Year=rep(rep(1850, 12),each=288*192), 
                        Lat=rep(latmx, time=12), Grid=rep(gridmx, 12),
                        GrowingN=rep(GrowNH, each=288*192), #growing season filter: Northern Hemisphere
                        GrowingS=rep(GrowSH, each=288*192)) #growing season filter: Southern Hemisphere

clim.GR.NH <- clim.1850[Lat>=0,.(area.wt=mean(area,na.rm=TRUE),tsa.m=mean(tsa,na.rm=TRUE) #ppt.m=mean(ppt,na.rm=T),
                                 #radd.m=mean(radd,na.rm=T),radi.m=mean(radi,na.rm=T), rh.m=mean(rh,na.rm=T)
), by=.(Year,Grid,GrowingN)] #growing season mean using NH filter
clim.GR.SH <- clim.1850[Lat<0,.(area.wt=mean(area,na.rm=TRUE),tsa.m=mean(tsa,na.rm=TRUE) #ppt.m=mean(ppt,na.rm=T),
                                #radd.m=mean(radd,na.rm=T),radi.m=mean(radi,na.rm=T), rh.m=mean(rh,na.rm=T)
), by=.(Year,Grid,GrowingS)] #SH filter
clim1850.GR <- rbind(clim.GR.NH[GrowingN==1], clim.GR.SH[GrowingS==1], fill=TRUE)
clim1850.GR <- clim1850.GR[order(Year,Grid), -c("GrowingN", "GrowingS")]
rm(clim.GR.NH, clim.GR.SH)


#piControl growing season average climate, assuming same total crop area as in 2006
clim1850.GR.wm <-clim1850.GR[,.(tsa.gl=weighted.mean(tsa.m, area.wt, na.rm = T), 
                                #radd.gl=weighted.mean(radd.m, area.wt), radi.gl=weighted.mean(radi.m, area.wt), 
                                #rh.gl=weighted.mean(rh.m, area.wt), ppt.gl=weighted.mean(ppt.m, area.wt), 
                                area.gl=sum(area.wt)), by=.(Year)] 



#======background global annual average TSA trends (without weighting by crop and growing season)
library(raster)
TSA.HIST <- brick((paste0(wd1, "/data/climate_data/HIST.clm2.h0.1850-2005-annual.nc")), varname="TSA")
AREA.HIST <- raster(paste0(wd1, "/data/land_cover/HIST.2deg.landarea.nc"), varname="area") #land frac and area static in the landuse.timeseries file
Landfrac.HIST <- raster(paste0(wd1, "/data/land_cover/HIST.2deg.landarea.nc"), varname="landfrac")
landarea <- AREA.HIST*Landfrac.HIST
TSA.HIST.w <- overlay(TSA.HIST, landarea, fun=function(x,y){(x*y)})
hist.tsa.wm <- cellStats(TSA.HIST.w, stat='sum', na.rm=TRUE)/cellStats(landarea, stat='sum', na.rm=TRUE) #global land weighted annual mean 
hist.tsa.ref <- mean(hist.tsa.wm[137:156]) - 273.15 #global mean of 1986-2005
piControl.tsa.gl <- mean(hist.tsa.wm[1:30]) - 273.15 #global mean of 1850-1879

rm(TSA.HIST,AREA.HIST,Landfrac.HIST,landarea,TSA.HIST.w)

#RCP85 and RCP45 have the same land frac and area for each grid cell though their PFT fractions are different
AREA <- raster(paste0(wd1, "/data/land_cover/RCP85.1deg.landarea.nc"), varname="area") #land frac and area static in the landuse.timeseries file
Landfrac <- raster(paste0(wd1, "/data/land_cover/RCP85.1deg.landarea.nc"), varname="landfrac")
TSA.85 <- brick((paste0(wd1, "/data/climate_date/RCP85.1deg.clm5.h0.2006-2100.climate-annual.nc")), varname="TSA")
TSA.45 <- brick((paste0(wd1, "/data/climate_date/RCP45.1deg.clm5.h0.2006-2100.climate-annual.nc")), varname="TSA")
TSA.SAI <- brick((paste0(wd1, "/data/climate_date/RCP85_SAI.1deg.clm5.h0.2020-2100.climate-annual.nc")), varname="TSA")
TSA.MSB <- brick((paste0(wd1, "/data/climate_date/RCP85_MSB.1deg.clm5.h0.2020-2100.climate-annual.nc")), varname="TSA")
TSA.CCT <- brick((paste0(wd1, "/data/climate_date/RCP85_CCT.1deg.clm5.h0.2020-2100.climate-annual.nc")), varname="TSA")
TSA.85.w <- overlay(TSA.85, AREA*Landfrac, fun=function(x,y){(x*y)})
TSA.45.w <- overlay(TSA.45, AREA*Landfrac, fun=function(x,y){(x*y)})
TSA.SAI.w <- overlay(TSA.SAI, AREA*Landfrac, fun=function(x,y){(x*y)})
TSA.MSB.w <- overlay(TSA.MSB, AREA*Landfrac, fun=function(x,y){(x*y)})
TSA.CCT.w <- overlay(TSA.CCT, AREA*Landfrac, fun=function(x,y){(x*y)})
tsa.85.wm <- cellStats(TSA.85.w, stat='sum', na.rm=TRUE)/cellStats(AREA*Landfrac, stat='sum', na.rm=TRUE) #global weighted mean 
tsa.45.wm <- cellStats(TSA.45.w, stat='sum', na.rm=TRUE)/cellStats(AREA*Landfrac, stat='sum', na.rm=TRUE) #global weighted mean 
tsa.SAI.wm <- cellStats(TSA.SAI.w, stat='sum', na.rm=TRUE)/cellStats(AREA*Landfrac, stat='sum', na.rm=TRUE) #global weighted mean 
tsa.MSB.wm <- cellStats(TSA.MSB.w, stat='sum', na.rm=TRUE)/cellStats(AREA*Landfrac, stat='sum', na.rm=TRUE) #global weighted mean 
tsa.CCT.wm <- cellStats(TSA.CCT.w, stat='sum', na.rm=TRUE)/cellStats(AREA*Landfrac, stat='sum', na.rm=TRUE) #global weighted mean 
rm(AREA,Landfrac,TSA.85,TSA.45,TSA.SAI,TSA.MSB,TSA.CCT,TSA.85.w,TSA.45.w,TSA.SAI.w,TSA.MSB.w,TSA.CCT.w)

TSA.gl <- data.table(tsa.gl=c(tsa.45.wm[-95], tsa.85.wm[-95], c(rep(NA,14), tsa.SAI.wm[-81]), c(rep(NA,14), tsa.MSB.wm[-81]), c(rep(NA,14), tsa.CCT.wm[-81]))-273.15,
                     Case=rep(c("RCP45", "RCP85", "SAI", "MSB", "CCT"), each=94), Year=rep(2006:2099, 5))

library(forecast)
#5-yr running mean
TSA.gl.ma <- data.table(tsa.gl=c(ma(tsa.45.wm[-95],5), ma(tsa.85.wm[-95],5), ma(c(rep(NA,14), tsa.SAI.wm[-81]),5),
                                 ma(c(rep(NA,14), tsa.MSB.wm[-81]),5), ma(c(rep(NA,14), tsa.CCT.wm[-81]),5))-273.15 ,
                        Case=rep(c("RCP45", "RCP85", "SAI", "MSB", "CCT"), each=94), Year=rep(2006:2099, 5))


save(list = c("clim1850.GR.wm", "df.5case.wm", "df.5case.ma", "piControl.tsa.gl", "TSA.gl", "TSA.gl.ma"), 
     file = paste0(wd1,"/data/Rdata/Fig1.GlobalAverage-GrowingSeasonClimate.Rdata"))


